memcached哈希表操作主要逻辑笔记

以下注释的源代码都在memcached项目的assoc.c文件中

  1 /* how many powers of 2's worth of buckets we use */
  2 unsigned int hashpower = HASHPOWER_DEFAULT; /* 哈希表bucket的级别,(1<<hashpower) == bucket的个数 */
  3 
  4 /* Main hash table. This is where we look except during expansion. */
  5 /**主要的哈希表, 用来存储memcached的key-value数据
  6 * 扩容时会将数据暂存在另一个指针,之后重新分配空间,再以bucket为单位
  7 * 将已有的数据迁移到这张表,所以这张表始终代表最新的数据
  8 */
  9 static item** primary_hashtable = 0;
 10 
 11 /*
 12 * Previous hash table. During expansion, we look here for keys that haven't
 13 * been moved over to the primary yet.
 14 */
 15 static item** old_hashtable = 0; /**原有的哈希表,只有在扩容时才会使用,保存原有的哈希表的数据 */
 16 
 17 /* Number of items in the hash table. */
 18 static unsigned int hash_items = 0; /** 整个哈希表中item的个数*/
 19 
 20 /* Flag: Are we in the middle of expanding now? */
 21 static bool expanding = false; /** 标识是否正在扩容哈希表*/
 22 static bool started_expanding = false; /* 是否已经开始扩容*/
 23 
 24 /*
 25 * During expansion we migrate values with bucket granularity; this is how
 26 * far we've gotten so far. Ranges from 0 .. hashsize(hashpower - 1) - 1.
 27 */
 28 /**
 29 * 在扩容期间,数据的迁移是以bucket为单位进行迁移, expand_bucket表示迁移进行到第几个bucket
 30 */
 31 static unsigned int expand_bucket = 0;
 32 
 33 /**
 34 * 哈希表的初始化
 35 * 整个哈希表类似于一个二维数组,初始化分配bucket的空间,具体的item直接申请空间链接在bucket子链上(拉链法解决key冲突)
 36 */
 37 void assoc_init(const int hashtable_init) {
 38 if (hashtable_init) {
 39 hashpower = hashtable_init;
 40 }
 41 /**初始化哈希表的存储空间*/
 42 primary_hashtable = calloc(hashsize(hashpower), sizeof(void *));
 43 if (! primary_hashtable) {
 44 fprintf(stderr, "Failed to init hashtable.\n");
 45 exit(EXIT_FAILURE);
 46 }
 47 STATS_LOCK();
 48 stats.hash_power_level = hashpower;
 49 stats.hash_bytes = hashsize(hashpower) * sizeof(void *);
 50 STATS_UNLOCK();
 51 }
 52 
 53 /**
 54 * 根据key查找item
 55 * hv 表示key的hash值
 56 */
 57 item *assoc_find(const char *key, const size_t nkey, const uint32_t hv) {
 58 item *it; //指向所属的bucket地址
 59 unsigned int oldbucket;
 60 //先判断是否正在扩容
 61 if (expanding && //正在扩容时继续判断key所属的bucket是否已经迁移到old_hashtable
 62 (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
 63 {
 64 it = old_hashtable[oldbucket]; //已经迁移到old_hashtable则在这里查找bucket
 65 } else {
 66 it = primary_hashtable[hv & hashmask(hashpower)];//没有迁移或者尚未迁移所属的bucket
 67 }
 68 
 69 item *ret = NULL;
 70 int depth = 0;
 71 /** 循环比较拉链上的每个item的key值,相等则返回item的引用*/
 72 while (it) {
 73 if ((nkey == it->nkey) && (memcmp(key, ITEM_key(it), nkey) == 0)) {
 74 ret = it;
 75 break;
 76 }
 77 it = it->h_next;
 78 ++depth;
 79 }
 80 MEMCACHED_ASSOC_FIND(key, nkey, depth);
 81 return ret;
 82 }
 83 /* returns the address of the item pointer before the key. if *item == 0,
 84 the item wasn't found */
 85 /**
 86 * 查找item的地址
 87 */
 88 static item** _hashitem_before (const char *key, const size_t nkey, const uint32_t hv) {
 89 item **pos;
 90 unsigned int oldbucket;
 91 /** 同样是先确定在哪一张表里找*/
 92 if (expanding &&
 93 (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
 94 {
 95 pos = &old_hashtable[oldbucket];
 96 } else {
 97 pos = &primary_hashtable[hv & hashmask(hashpower)];
 98 }
 99 
100 /** */
101 while (*pos && ((nkey != (*pos)->nkey) || memcmp(key, ITEM_key(*pos), nkey))) {
102 pos = &(*pos)->h_next;
103 }
104 return pos;
105 }
106 
107 /* grows the hashtable to the next power of 2. */
108 /** 将已有的哈希表扩容为原来的2倍buckets数量*/
109 static void assoc_expand(void) {
110 /** old_hashtable指向已有的primary_hashtable*/
111 old_hashtable = primary_hashtable;
112 
113 /**重新为 primary_hashtable分配空间*/
114 primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *));
115 if (primary_hashtable) { /** 分配成功*/
116 if (settings.verbose > 1)
117 fprintf(stderr, "Hash table expansion starting\n");
118 hashpower++;
119 expanding = true; /** 设置开始扩容标识*/
120 expand_bucket = 0; /** 已迁移的bucket序号*/
121 STATS_LOCK();
122 stats.hash_power_level = hashpower;
123 stats.hash_bytes += hashsize(hashpower) * sizeof(void *);
124 stats.hash_is_expanding = 1;
125 STATS_UNLOCK();
126 } else {
127 primary_hashtable = old_hashtable; /** 分配失败*/
128 /* Bad news, but we can keep running. */
129 }
130 }
131 
132 static void assoc_start_expand(void) {
133 if (started_expanding)
134 return;
135 started_expanding = true;
136 pthread_cond_signal(&maintenance_cond);
137 }
138 
139 /* Note: this isn't an assoc_update. The key must not already exist to call this */
140 /** 插入一个item到哈希表,这里必须保证item->key尚未存在已有的哈希表*/
141 int assoc_insert(item *it, const uint32_t hv) {
142 unsigned int oldbucket;
143 
144 // assert(assoc_find(ITEM_key(it), it->nkey) == 0); /* shouldn't have duplicately named things defined */
145 
146 if (expanding &&
147 (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
148 { /** 正在扩容且对应的bucket尚未被迁移到primary_hashtable*/
149 it->h_next = old_hashtable[oldbucket]; 
150 old_hashtable[oldbucket] = it;
151 } else { /** 没有在扩容或者对应的bucket已经被迁移到primary_hashtable*/
152 it->h_next = primary_hashtable[hv & hashmask(hashpower)];
153 primary_hashtable[hv & hashmask(hashpower)] = it;
154 }
155 
156 // 更新hash_item的数量
157 hash_items++;
158 
159 /** 哈希表item的数量超过bucket数的3分之2, 这里表示只关心由于存储item数量增长必须引起的扩容*/
160 if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) {
161 assoc_start_expand(); //发送条件变量满足的信号
162 }
163 
164 MEMCACHED_ASSOC_INSERT(ITEM_key(it), it->nkey, hash_items);
165 return 1;
166 }
167 /** 删除一个item*/
168 void assoc_delete(const char *key, const size_t nkey, const uint32_t hv) {
169 /** 找到指向item地址的指针*/
170 item **before = _hashitem_before(key, nkey, hv);
171 
172 if (*before) {
173 item *nxt;
174 hash_items--; /** 减少1个 */
175 /* The DTrace probe cannot be triggered as the last instruction
176 * due to possible tail-optimization by the compiler
177 */
178 MEMCACHED_ASSOC_DELETE(key, nkey, hash_items);
179 /** 链表操作删除一个元素*/
180 nxt = (*before)->h_next;
181 (*before)->h_next = 0; /* probably pointless, but whatever. */
182 *before = nxt;
183 return;
184 }
185 /* Note: we never actually get here. the callers don't delete things
186 they can't find. */
187 assert(*before != 0);
188 }
189 
190 /** 标识是否需要执行维护线程主要逻辑*/
191 static volatile int do_run_maintenance_thread = 1;
192 
193 #define DEFAULT_HASH_BULK_MOVE 1
194 int hash_bulk_move = DEFAULT_HASH_BULK_MOVE;
195 
196 /** 哈希表维护线程的主要逻辑*/
197 static void *assoc_maintenance_thread(void *arg) {
198 
199 /** 主线程未退出时,这里基本是进入一个无限循环*/
200 while (do_run_maintenance_thread) {
201 int ii = 0;
202 
203 /* Lock the cache, and bulk move multiple buckets to the new
204 * hash table. */
205 /** 获取worker线程共享的item_global_lock锁,批量迁移buckets到新的哈希表*/
206 item_lock_global();
207 mutex_lock(&cache_lock);
208 
209 /** 这个循环默认只走一次,主要目的是不想过久的占用item全局锁,影响worker线程工作效率*/
210 for (ii = 0; ii < hash_bulk_move && expanding; ++ii) {
211 item *it, *next;
212 int bucket;
213 
214 /** 对bucket上拉链的每一个item进行重新hash到primary_hashtable*/
215 for (it = old_hashtable[expand_bucket]; NULL != it; it = next) {
216 next = it->h_next;
217 
218 /** 重新计算所属的bucket*/
219 bucket = hash(ITEM_key(it), it->nkey) & hashmask(hashpower);
220 /** 加入到primary_hashtable对应bucket的头部*/
221 it->h_next = primary_hashtable[bucket];
222 primary_hashtable[bucket] = it;
223 }
224 
225 /** 将old_hashtable上已经被迁移的bucket置为NULL*/
226 old_hashtable[expand_bucket] = NULL;
227 
228 /** 递增迁移的bucket序号*/
229 expand_bucket++;
230 /** 判断是否已经迁移完*/
231 if (expand_bucket == hashsize(hashpower - 1)) {
232 expanding = false;
233 free(old_hashtable); //释放old_hashtable
234 STATS_LOCK();
235 stats.hash_bytes -= hashsize(hashpower - 1) * sizeof(void *);
236 stats.hash_is_expanding = 0;
237 STATS_UNLOCK();
238 if (settings.verbose > 1)
239 fprintf(stderr, "Hash table expansion done\n");
240 }
241 }
242 
243 /** 释放锁*/
244 mutex_unlock(&cache_lock);
245 /** 释放全局锁,这样worker线程才有机会获得锁进而服务用户请求,减少等待时间*/
246 item_unlock_global();
247 
248 /** 未进行扩容或者扩容结束*/
249 if (!expanding) {
250 /** 通知其他线程使用细粒度的锁,通过线程pipe进行通信*/
251 /* finished expanding. tell all threads to use fine-grained locks */
252 switch_item_lock_type(ITEM_LOCK_GRANULAR);
253 
254 /**恢复slabs的自平衡锁,确保哈希表扩容不会与slabs重新分配同时进行*/
255 slabs_rebalancer_resume();
256 
257 /**本次扩容完成,等待下一次调用*/
258 /* We are done expanding.. just wait for next invocation */
259 mutex_lock(&cache_lock);
260 started_expanding = false;
261 /** 刚启动系统时尚未需要进行扩容,线程会阻塞到这里等待线程条件信号*/
262 /** 等待条件变量满足信号*/
263 pthread_cond_wait(&maintenance_cond, &cache_lock);
264 /* Before doing anything, tell threads to use a global lock */
265 mutex_unlock(&cache_lock);
266 
267 /** 确保slabs没有正在进行重新分配*/
268 slabs_rebalancer_pause();
269 
270 /**通过pipe的方式通知worker线程改变使用锁的粒度为全局锁*/
271 switch_item_lock_type(ITEM_LOCK_GLOBAL);
272 mutex_lock(&cache_lock);
273 /** 开始扩容*/
274 assoc_expand();
275 mutex_unlock(&cache_lock);
276 }
277 }
278 return NULL;
279 }
280 
281 static pthread_t maintenance_tid;
282 
283 /** 启动哈希表扩容监听线程*/
284 int start_assoc_maintenance_thread() {
285 int ret;
286 char *env = getenv("MEMCACHED_HASH_BULK_MOVE");
287 if (env != NULL) {
288 hash_bulk_move = atoi(env);
289 if (hash_bulk_move == 0) {
290 hash_bulk_move = DEFAULT_HASH_BULK_MOVE;
291 }
292 }
293 /** 创建线程*/
294 if ((ret = pthread_create(&maintenance_tid, NULL,
295 assoc_maintenance_thread, NULL)) != 0) {
296 fprintf(stderr, "Can't create thread: %s\n", strerror(ret));
297 return -1;
298 }
299 return 0;
300 }
301 
302 /** 停止扩容线程,基本是在主线程退出时才会被调用*/
303 void stop_assoc_maintenance_thread() {
304 mutex_lock(&cache_lock);
305 do_run_maintenance_thread = 0;
306 pthread_cond_signal(&maintenance_cond);
307 mutex_unlock(&cache_lock);
308 
309 /* Wait for the maintenance thread to stop */
310 pthread_join(maintenance_tid, NULL);
311 }
posted @ 2014-07-17 23:45  Bico 笔记  阅读(625)  评论(0编辑  收藏  举报